# coordinates are 36.17, -115.14

lv_full <- fread("data/Las_Vegas_Solar_Irradiation/LV.csv")

head(lv_full)
##    Year Month Day Hour Minute Dew Point Surface Albedo Wind Speed
## 1: 2000     1   1    0      0        -4          0.169        2.2
## 2: 2000     1   1    0     30        -4          0.169        2.2
## 3: 2000     1   1    1      0        -3          0.169        2.2
## 4: 2000     1   1    1     30        -3          0.169        2.0
## 5: 2000     1   1    2      0        -2          0.169        1.9
## 6: 2000     1   1    2     30        -2          0.169        1.7
##    Relative Humidity Temperature Pressure Wind Direction Precipitable Water
## 1:              55.3           6      940            267               1.03
## 2:              55.3           6      940            267               1.04
## 3:              59.0           6      940            274               1.05
## 4:              59.0           6      940            274               1.05
## 5:              61.4           6      940            278               1.06
## 6:              61.3           6      940            278               1.06
##    Solar Zenith Angle GHI Cloud Type
## 1:                166   0          4
## 2:                164   0          8
## 3:                159   0          4
## 4:                154   0          4
## 5:                148   0          4
## 6:                142   0          1
str(lv_full)
## Classes 'data.table' and 'data.frame':   367920 obs. of  16 variables:
##  $ Year              : int  2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
##  $ Month             : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Day               : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Hour              : int  0 0 1 1 2 2 3 3 4 4 ...
##  $ Minute            : int  0 30 0 30 0 30 0 30 0 30 ...
##  $ Dew Point         : num  -4 -4 -3 -3 -2 -2 -2 -2 -2 -2 ...
##  $ Surface Albedo    : num  0.169 0.169 0.169 0.169 0.169 0.169 0.169 0.169 0.169 0.169 ...
##  $ Wind Speed        : num  2.2 2.2 2.2 2 1.9 1.7 1.5 1.4 1.4 1.3 ...
##  $ Relative Humidity : num  55.3 55.3 59 59 61.4 ...
##  $ Temperature       : num  6 6 6 6 6 6 6 5 5 5 ...
##  $ Pressure          : int  940 940 940 940 940 940 940 940 940 940 ...
##  $ Wind Direction    : num  267 267 274 274 278 ...
##  $ Precipitable Water: num  1.03 1.04 1.05 1.05 1.06 ...
##  $ Solar Zenith Angle: num  166 164 159 154 148 ...
##  $ GHI               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Cloud Type        : int  4 8 4 4 4 1 4 4 1 4 ...
##  - attr(*, ".internal.selfref")=<externalptr>
lv_full$Date <- make_datetime(year=lv_full$Year, month=lv_full$Month, day=lv_full$Day, hour=lv_full$Hour, min=lv_full$Minute)
lv_full %>% ggplot(aes(x=Date, y=Temperature)) + geom_line(color="blue") 

plots <- NULL

# create a graph for each of the 20 years
temp <- lv_full %>% 
  # average by year-month
  group_by(Year, Month) %>% 
  summarise(Temperature = mean(Temperature, na.rm = TRUE), .groups = "drop") %>% 
  ggplot() +
  geom_line(aes(x = Month, y = Temperature, color = factor(Year))) +
  scale_x_continuous(breaks = 1:12, labels = month.abb, minor_breaks = NULL) +
  labs(title = "Average Temperature by Month", colour = "Year")

dew <- lv_full %>% 
  # average by year-month
  group_by(Year, Month) %>% 
  summarise(`Dew Point` = mean(`Dew Point`, na.rm = TRUE), .groups = "drop") %>% 
  ggplot() +
  geom_line(aes(x = Month, y = `Dew Point`, color = factor(Year))) +
  scale_x_continuous(breaks = 1:12, labels = month.abb, minor_breaks = NULL) +
  labs(title = "Average Dew Point by Month", colour = "Year")

humidity <- lv_full %>% 
  # average by year-month
  group_by(Year, Month) %>% 
  summarise(`Relative Humidity` = mean(`Relative Humidity`, na.rm = TRUE), .groups = "drop") %>% 
  ggplot() +
  geom_line(aes(x = Month, y = `Relative Humidity`, color = factor(Year))) +
  scale_x_continuous(breaks = 1:12, labels = month.abb, minor_breaks = NULL) +
  labs(title = "Average Relative Humidity by Month", colour = "Year")

albedo <- lv_full %>% 
  # average by year-month
  group_by(Year, Month) %>% 
  summarise(`Surface Albedo` = mean(`Surface Albedo`, na.rm = TRUE), .groups = "drop") %>% 
  ggplot() +
  geom_line(aes(x = Month, y = `Surface Albedo`, color = factor(Year))) +
  scale_x_continuous(breaks = 1:12, labels = month.abb, minor_breaks = NULL) +
  labs(title = "Average Surface Albedo by Month", colour = "Year")

wind <- lv_full %>% 
  # average by year-month
  group_by(Year, Month) %>% 
  summarise(`Wind Speed` = mean(`Wind Speed`, na.rm = TRUE), .groups = "drop") %>% 
  ggplot() +
  geom_line(aes(x = Month, y = `Wind Speed`, color = factor(Year))) +
  scale_x_continuous(breaks = 1:12, labels = month.abb, minor_breaks = NULL) +
  labs(title = "Average Wind Speed by Month", colour = "Year")

pressure <- lv_full %>% 
  # average by year-month
  group_by(Year, Month) %>% 
  summarise(`Pressure` = mean(`Pressure`, na.rm = TRUE), .groups = "drop") %>% 
  ggplot() +
  geom_line(aes(x = Month, y = `Pressure`, color = factor(Year))) +
  scale_x_continuous(breaks = 1:12, labels = month.abb, minor_breaks = NULL) +
  labs(title = "Average Pressure by Month", colour = "Year")

windD <- lv_full %>% 
  # average by year-month
  group_by(Year, Month) %>% 
  summarise(`Wind Direction` = mean(`Wind Direction`, na.rm = TRUE), .groups = "drop") %>% 
  ggplot() +
  geom_line(aes(x = Month, y = `Wind Direction`, color = factor(Year))) +
  scale_x_continuous(breaks = 1:12, labels = month.abb, minor_breaks = NULL) +
  labs(title = "Average Wind Direction by Month", colour = "Year")

precip <- lv_full %>% 
  # average by year-month
  group_by(Year, Month) %>% 
  summarise(`Precipitable Water` = mean(`Precipitable Water`, na.rm = TRUE), .groups = "drop") %>% 
  ggplot() +
  geom_line(aes(x = Month, y = `Precipitable Water`, color = factor(Year))) +
  scale_x_continuous(breaks = 1:12, labels = month.abb, minor_breaks = NULL) +
  labs(title = "Average Precipitable Water by Month", colour = "Year")

zenith <- lv_full %>% ggplot(aes(x=`Solar Zenith Angle`)) + geom_histogram(bins=360) + scale_x_continuous(breaks=seq(0,360,30)) + labs(title="Solar Zenith Angle Histogram")

ghi <- lv_full %>% 
  # average by year-month
  group_by(Year, Month) %>% 
  summarise(`GHI` = mean(`GHI`, na.rm = TRUE), .groups = "drop") %>% 
  ggplot() +
  geom_line(aes(x = Month, y = `GHI`, color = factor(Year))) +
  scale_x_continuous(breaks = 1:12, labels = month.abb, minor_breaks = NULL) +
  labs(title = "Average GHI by Month", colour = "Year")

cloud <- lv_full %>% ggplot(aes(x=`Cloud Type`)) + geom_histogram(bins=11) + scale_x_continuous(breaks=seq(0,10,1))+ labs(title="Cloud Type")

vsTemp <- lv_full %>% 
  group_by(Year, Month) %>% 
  summarise(`GHI` = mean(`GHI`, na.rm=TRUE), `Temperature` = mean(`Temperature`, na.rm=TRUE)) %>% 
  ggplot(aes(x=`Temperature`, y=GHI)) + geom_point()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
vsDew <- lv_full %>% 
  group_by(Year, Month) %>% 
  summarise(`GHI` = mean(`GHI`, na.rm=TRUE), `Dew Point` = mean(`Dew Point`, na.rm=TRUE)) %>% 
  ggplot(aes(x=`Dew Point`, y=GHI)) + geom_point()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
vsHumid <- lv_full %>% 
  group_by(Year, Month) %>% 
  summarise(`GHI` = mean(`GHI`, na.rm=TRUE), `Relative Humidity` = mean(`Relative Humidity`, na.rm=TRUE)) %>% 
  ggplot(aes(x=`Relative Humidity`, y=GHI)) + geom_point()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
vsAlbedo <- lv_full %>% 
  group_by(Year, Month) %>% 
  summarise(`GHI` = mean(`GHI`, na.rm=TRUE), `Surface Albedo` = mean(`Surface Albedo`, na.rm=TRUE)) %>% 
  ggplot(aes(x=`Surface Albedo`, y=GHI)) + geom_point()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
vsWindS <- lv_full %>% 
  group_by(Year, Month) %>% 
  summarise(`GHI` = mean(`GHI`, na.rm=TRUE), `Wind Speed` = mean(`Wind Speed`, na.rm=TRUE)) %>% 
  ggplot(aes(x=`Wind Speed`, y=GHI)) + geom_point()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
vsPressure <- lv_full %>% 
  group_by(Year, Month) %>% 
  summarise(`GHI` = mean(`GHI`, na.rm=TRUE), `Pressure` = mean(`Pressure`, na.rm=TRUE)) %>% 
  ggplot(aes(x=`Pressure`, y=GHI)) + geom_point()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
vsWindD <- lv_full %>% 
  group_by(Year, Month) %>% 
  summarise(`GHI` = mean(`GHI`, na.rm=TRUE), `Wind Direction` = mean(`Wind Direction`, na.rm=TRUE)) %>% 
  ggplot(aes(x=`Wind Direction`, y=GHI)) + geom_point()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
vsPrecip <- lv_full %>% 
  group_by(Year, Month) %>% 
  summarise(`GHI` = mean(`GHI`, na.rm=TRUE), `Precipitable Water` = mean(`Precipitable Water`, na.rm=TRUE)) %>% 
  ggplot(aes(x=`Precipitable Water`, y=GHI)) + geom_point()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
vsWindD <- lv_full %>% 
  group_by(Year, Month) %>% 
  summarise(`GHI` = mean(`GHI`, na.rm=TRUE), `Wind Direction` = mean(`Wind Direction`, na.rm=TRUE)) %>% 
  ggplot(aes(x=`Wind Direction`, y=GHI)) + geom_point()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
vsZenith <- lv_full %>%
  group_by(`Solar Zenith Angle`) %>%
  summarise(average_ghi = mean(GHI, na.rm=TRUE)) %>%
  ggplot(aes(x=`Solar Zenith Angle`, y=average_ghi)) + geom_point()

vsCloud <- lv_full %>%
  group_by(`Cloud Type`) %>%
  summarise(average_ghi = mean(GHI, na.rm=TRUE)) %>%
  ggplot(aes(x=`Cloud Type`, y=average_ghi)) + geom_point()

plots <- list(temp, dew, humidity, albedo, wind, pressure, windD, precip, zenith, ghi, cloud)
plots
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

plotsVs <- list(vsTemp, vsDew, vsHumid, vsAlbedo, vsWindS, vsPressure, vsWindD, vsPrecip, vsZenith, vsCloud)
plotsVs
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

summary(lv_full)
##       Year          Month            Day            Hour           Minute  
##  Min.   :2000   Min.   : 1.00   Min.   : 1.0   Min.   : 0.00   Min.   : 0  
##  1st Qu.:2005   1st Qu.: 4.00   1st Qu.: 8.0   1st Qu.: 5.75   1st Qu.: 0  
##  Median :2010   Median : 7.00   Median :16.0   Median :11.50   Median :15  
##  Mean   :2010   Mean   : 6.53   Mean   :15.7   Mean   :11.50   Mean   :15  
##  3rd Qu.:2015   3rd Qu.:10.00   3rd Qu.:23.0   3rd Qu.:17.25   3rd Qu.:30  
##  Max.   :2020   Max.   :12.00   Max.   :31.0   Max.   :23.00   Max.   :30  
##    Dew Point      Surface Albedo    Wind Speed    Relative Humidity
##  Min.   :-27.00   Min.   :0.146   Min.   : 0.00   Min.   :  2.2    
##  1st Qu.: -5.00   1st Qu.:0.165   1st Qu.: 1.30   1st Qu.: 17.2    
##  Median : -1.00   Median :0.173   Median : 2.00   Median : 29.1    
##  Mean   : -0.93   Mean   :0.176   Mean   : 2.58   Mean   : 34.2    
##  3rd Qu.:  3.00   3rd Qu.:0.184   3rd Qu.: 3.50   3rd Qu.: 46.5    
##  Max.   : 23.00   Max.   :0.870   Max.   :13.40   Max.   :100.0    
##   Temperature      Pressure   Wind Direction Precipitable Water
##  Min.   :-5.0   Min.   :910   Min.   :  0    Min.   :0.10      
##  1st Qu.:11.0   1st Qu.:940   1st Qu.: 93    1st Qu.:0.77      
##  Median :19.0   Median :940   Median :209    Median :1.10      
##  Mean   :19.9   Mean   :940   Mean   :186    Mean   :1.36      
##  3rd Qu.:28.0   3rd Qu.:940   3rd Qu.:252    3rd Qu.:1.64      
##  Max.   :47.0   Max.   :960   Max.   :360    Max.   :6.80      
##  Solar Zenith Angle      GHI         Cloud Type   
##  Min.   : 13.0      Min.   :   0   Min.   : 0.00  
##  1st Qu.: 59.2      1st Qu.:   0   1st Qu.: 0.00  
##  Median : 89.8      Median :   0   Median : 0.00  
##  Mean   : 89.7      Mean   : 234   Mean   : 1.92  
##  3rd Qu.:120.4      3rd Qu.: 460   3rd Qu.: 4.00  
##  Max.   :167.1      Max.   :1084   Max.   :10.00  
##       Date                       
##  Min.   :2000-01-01 00:00:00.00  
##  1st Qu.:2005-04-02 05:52:30.00  
##  Median :2010-07-02 11:45:00.00  
##  Mean   :2010-07-02 14:04:12.55  
##  3rd Qu.:2015-10-01 17:37:30.00  
##  Max.   :2020-12-31 23:30:00.00
#names(lv_full)
lv_full$Zenith_Bins <- cut(lv_full$`Solar Zenith Angle`, breaks = seq(0, 180, by = 10))

# Plotting irradiance vs zenith angle
# The gray dots are outliers of solar irradiance, likely caused by clouds
ggplot(data = lv_full, aes(x = Zenith_Bins, y = GHI)) +
  geom_boxplot(outlier.color = "gray", size = 0.5) +
  labs(x = "Solar Zenith Angle (degrees)", y = "Global Horizontal Irradiance (W/m²)",
       title = "Distribution of GHI for Different Solar Zenith Angle (Grouped by bins of 10 degrees)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# plotting solar zenith against time of day
ggplot(data = lv_full, aes(x = Hour, y = `Solar Zenith Angle`, group = interaction(Month, Day), color = factor(Month))) +
  geom_line() +
  labs(x = "Time of Day (Hour)", y = "Solar Zenith Angle (degrees)",
       title = "Solar Zenith Angle by Time of Day (Spaghetti Plot)") +
  scale_color_discrete(name = "Month", labels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")) +
  theme_minimal() +
  theme(legend.position = "top", axis.text.x = element_text(angle = 45, hjust = 1))

# shows the difference between solstices, and their effect on solar zenith angle
filtered_data <- lv_full %>%
  filter(Month %in% c(6, 12))
ggplot(data = filtered_data, aes(x = Hour, y = `Solar Zenith Angle`, group = interaction(Month, Day), color = factor(Month))) +
  geom_line() +
  labs(x = "Time of Day (Hour)", y = "Solar Zenith Angle (degrees)",
       title = "Solar Zenith Angle by Time of Day (June and December Only)") +
  scale_color_discrete(name = "Month", labels = c("June", "December")) +
  theme_minimal() +
  theme(legend.position = "top", axis.text.x = element_text(angle = 45, hjust = 1))

#plotting how solar irradiance is affected by time of month
lv_full$Month <- factor(lv_full$Month, levels = 1:12, labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))
agg_data <- lv_full %>%
  group_by(Month, Hour) %>%
  summarize(Avg_GHI = mean(GHI))
## `summarise()` has grouped output by 'Month'. You can override using the
## `.groups` argument.
ggplot(data = lv_full, aes(x = Hour, y = GHI, group = interaction(Month, Hour), color = Month)) +
  geom_line(size = 1.5, alpha = 0.7) +  # Set the size to 1.5 (adjust as needed)
  labs(x = "Hour of the Day", y = "Average Global Horizontal Irradiance (W/m²)",
       title = "Average Solar Irradiance by Hour for Each Month (Spaghetti Plot)") +
  scale_x_continuous(breaks = seq(0, 23, by = 1)) +
  theme_minimal() +
  theme(legend.position = "top", axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# num_unique_clouds <- lv_full %>%
#   distinct(Cloud_Type) %>%
#   nrow()
lv_full <- lv_full %>%
  rename(Cloud_Type = `Cloud Type`)
lv_full$Cloud_Type <- factor (lv_full$Cloud_Type)
# Effect of clouds on GHI
ggplot(data = lv_full, aes(x = Cloud_Type, y = GHI)) +
  stat_summary(data = subset(lv_full, `Solar Zenith Angle` >= 0 & `Solar Zenith Angle` <= 80),
               fun = "mean", geom = "bar", fill = "skyblue", color = "black") +
  labs(x = "Cloud_Type", y = "Mean Global Horizontal Irradiance (W/m²)",
       title = "Effect of Cloud Type on Irradiance (Solar Zenith: 0-80 degrees)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

names(lv_full)
##  [1] "Year"               "Month"              "Day"               
##  [4] "Hour"               "Minute"             "Dew Point"         
##  [7] "Surface Albedo"     "Wind Speed"         "Relative Humidity" 
## [10] "Temperature"        "Pressure"           "Wind Direction"    
## [13] "Precipitable Water" "Solar Zenith Angle" "GHI"               
## [16] "Cloud_Type"         "Date"               "Zenith_Bins"
# temperature vs irradiance boxplot
ggplot(data = subset(lv_full, `Solar Zenith Angle` >= 0 & `Solar Zenith Angle` <= 80),
       aes(x = cut(Temperature, breaks = seq(min(Temperature), max(Temperature) + 2, by = 2)), y = GHI)) +
  geom_boxplot() +
  labs(x = "Temperature (°C)", y = "Global Horizontal Irradiance (W/m²)",
       title = "Temperature vs. Global Horizontal Irradiance (Solar Zenith: 0-80 degrees)") +
  theme_minimal()

#Surface Albedo vs Irradiance for sunlight hours
filtered_data <- lv_full %>%
  filter(`Solar Zenith Angle` >= 0 & `Solar Zenith Angle` <= 80,
         `Surface Albedo` >= 0.10 & `Surface Albedo` <= 0.25)
filtered_data <- filtered_data %>%
  mutate(Albedo_Bin = cut(`Surface Albedo`, breaks = seq(0.10, 0.25, by = 0.005)),)
ggplot(data = filtered_data, aes(x = Albedo_Bin, y = GHI)) +
  geom_boxplot(outlier.color = "gray", size = 0.5) +
  labs(x = "Surface Albedo", y = "Global Horizontal Irradiance (W/m²)",
       title = "Distribution of GHI for Different Surface Albedo (Grouped by bins of 0.005)\n(Zenith: 0-80 degrees)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

#Relative humidity vs Solar Irradiance for zentih 0-80
filtered_data <- filtered_data %>%
  mutate(RelHumidity_Bin = cut(`Relative Humidity`, breaks = seq(0, 100, by = 2.5)))
ggplot(data = filtered_data, aes(x = RelHumidity_Bin, y = GHI)) +
  geom_boxplot(fill = "skyblue", color = "black") +
  labs(x = "Relative Humidity (%)", y = "Global Horizontal Irradiance (W/m²)",
       title = "GHI vs. Relative Humidity (Zenith: 0-80 degrees, Bin Size: 2.5%)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

#Precipitable Water vs GHI (for hours during sunlight)
filtered_data <- filtered_data %>%
  mutate(PrecipitableWater_Bin = cut(`Precipitable Water`, breaks = seq(min(`Precipitable Water`), max(`Precipitable Water`), by = 0.15)))
ggplot(data = filtered_data, aes(x = PrecipitableWater_Bin, y = GHI)) +
  geom_boxplot(fill = "skyblue", color = "black") +
  labs(x = "Precipitable Water (Bin Size: 0.05)", y = "Global Horizontal Irradiance (W/m²)",
       title = "GHI vs. Precipitable Water (Zenith: 0-80 degrees)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))